### Load standardpackages
library(tidyverse) # Collection of all the good stuff like dplyr, ggplot2 ect.
library(magrittr) # For extra-piping operators (eg. %<>%)
library(tidygraph)
library(ggraph)
library(igraph)
In this session, you will learn:
We up to now already talked about different ways how networks can be constructed. Up to now, we mainly focussed on:
However, network analysis and modelling is also fully consistent with other structures, which are often a natural outcome of supervised or unsupervised ML exercises:
mtcars example.mtcars %>% head()
cars_pca <- mtcars[,c(1:7,10,11)] %>%
drop_na() %>%
prcomp(center = TRUE , scale = TRUE)
dist()) function.cars_dist <- cars_pca$x %>% dist(method = "euclidean")
La voila. Such a distance matrix representas a relational structure and can be modelled as a network.
g <- cars_dist %>%
as.matrix() %>%
as_tbl_graph(directed = FALSE)
g <- g %>% simplify() %>% as_tbl_graph()
g
# A tbl_graph: 32 nodes and 496 edges
#
# An undirected simple graph with 1 component
#
# Node Data: 32 x 1 (active)
name
<chr>
1 Mazda RX4
2 Mazda RX4 Wag
3 Datsun 710
4 Hornet 4 Drive
5 Hornet Sportabout
6 Valiant
# … with 26 more rows
#
# Edge Data: 496 x 3
from to weight
<int> <int> <dbl>
1 1 2 0.408
2 1 3 2.57
3 1 4 3.38
# … with 493 more rows
g <- g %E>%
mutate(weight = max(weight) - weight) %>%
filter(weight >= weight %>% quantile(0.75)) %N>%
filter(!node_is_isolated()) %>%
mutate(community = group_louvain(weights = weight) %>% factor())
Lets take a look!
set.seed(1337)
g %>% ggraph(layout = "nicely") +
geom_node_point(aes(col = community, size = centrality_degree(weights = weight))) +
geom_edge_link(aes(width = weight), alpha = 0.25) +
scale_edge_width(range = c(0.1, 2)) +
geom_node_text(aes(label = name, filter = percent_rank(centrality_degree(weights = weight)) > 0.5), repel = TRUE) +
theme_graph() +
theme(legend.position = 'bottom')
Hirarchical structures are obviously also relational. The difference is, that the connectivity structure tends to be constraint to other levels.
create_tree(20, 3) %>%
mutate(leaf = node_is_leaf(), root = node_is_root()) %>%
ggraph(layout = 'tree') +
geom_edge_diagonal() +
geom_node_point(aes(filter = leaf), colour = 'forestgreen', size = 10) +
geom_node_point(aes(filter = root), colour = 'firebrick', size = 10) +
theme_graph()
cars_hc <- cars_dist %>%
hclust(method = "ward.D2")
Again, this structure can be directly transfered to a graph object.
g <- cars_hc %>% as_tbl_graph()
g
# A tbl_graph: 63 nodes and 62 edges
#
# A rooted tree
#
# Node Data: 63 x 4 (active)
height leaf label members
<dbl> <lgl> <chr> <int>
1 0 TRUE "Porsche 914-2" 1
2 0 TRUE "Lotus Europa" 1
3 1.62 FALSE "" 2
4 0 TRUE "Honda Civic" 1
5 0 TRUE "Fiat X1-9" 1
6 0 TRUE "Fiat 128" 1
# … with 57 more rows
#
# Edge Data: 62 x 2
from to
<int> <int>
1 3 1
2 3 2
3 8 6
# … with 59 more rows
g %>% ggraph(layout = 'dendrogram') +
geom_edge_diagonal(aes(col = .N()$height[from])) +
geom_node_point(aes(col =height)) +
geom_node_text(aes(filter = leaf, label = label), angle=90, hjust=1, nudge_y=-0.1) +
theme_graph() +
theme(legend.position = 'none')
ylim(-0.6, NA)
<ScaleContinuousPosition>
Range:
Limits: 0 -- 1
## Demonstartion
set.seed(1337)
g <- create_bipartite(4, 10, directed = TRUE, mode = "out") %E>%
sample_n(15)
people <- c('Jesper', 'Pernille', 'Morten', 'Lise', 'Christian', 'Mette', 'Casper', 'Dorte', 'Jacob', 'Helle')
places <- c('Yoga House', 'Crossfit', 'Jazz Club', 'Jomfru Anne Gade')
g <- g %N>%
mutate(name = c(places, people))
g
# A tbl_graph: 14 nodes and 15 edges
#
# A directed acyclic simple graph with 3 components
#
# Node Data: 14 x 2 (active)
type name
<lgl> <chr>
1 FALSE Yoga House
2 FALSE Crossfit
3 FALSE Jazz Club
4 FALSE Jomfru Anne Gade
5 TRUE Jesper
6 TRUE Pernille
# … with 8 more rows
#
# Edge Data: 15 x 2
from to
<int> <int>
1 1 7
2 1 11
3 1 13
# … with 12 more rows
set.seed(1337)
p0 <- g %>% ggraph("bipartite") +
geom_edge_link(alpha = 0.25) +
geom_node_point(aes(col = type, size = centrality_degree(mode = 'all'))) +
geom_node_text(aes(label = name), repel = TRUE) +
theme_graph() +
theme(legend.position = 'none') +
labs(title = '2-mode network places-people')
p0
igraph function bipartite_projection to create a 2x 1-mode network out of it.g_projected <- g %>% bipartite_projection()
g_projected
$proj1
IGRAPH bbfb07a UNW- 4 5 -- Full bipartite graph
+ attr: name (g/c), name (v/c), weight (e/n)
+ edges from bbfb07a (vertex names):
[1] Yoga House--Jomfru Anne Gade Yoga House--Crossfit Yoga House--Jazz Club Crossfit --Jazz Club Crossfit --Jomfru Anne Gade
$proj2
IGRAPH ff97f74 UNW- 10 19 -- Full bipartite graph
+ attr: name (g/c), name (v/c), weight (e/n)
+ edges from ff97f74 (vertex names):
[1] Pernille--Mette Pernille--Casper Pernille--Dorte Pernille--Jacob Morten --Casper Morten --Jacob Morten --Helle Lise --Mette Lise --Dorte Lise --Jacob
[11] Lise --Helle Mette --Dorte Mette --Jacob Mette --Casper Casper --Jacob Casper --Helle Casper --Dorte Dorte --Jacob Jacob --Helle
g_places <- g_projected[['proj1']] %>% as_tbl_graph(directed = FALSE)
g_people <- g_projected[['proj2']] %>% as_tbl_graph(directed = FALSE)
set.seed(1337)
library(patchwork)
p1 <- g_places %>% ggraph(layout = "nicely") +
geom_node_point(aes(size = centrality_degree(weights = weight)), col = 'red') +
geom_edge_link(aes(width = weight), alpha = 0.25) +
scale_edge_width(range = c(0.1, 2)) +
geom_node_text(aes(label = name), repel = TRUE) +
theme_graph() +
theme(legend.position = 'none') +
labs(title = '1-mode network places')
p2 <- g_people %>% ggraph(layout = "nicely") +
geom_node_point(aes(size = centrality_degree(weights = weight)), col = 'skyblue2') +
geom_edge_link(aes(width = weight), alpha = 0.25) +
scale_edge_width(range = c(0.1, 2)) +
geom_node_text(aes(label = name), repel = TRUE) +
theme_graph() +
theme(legend.position = 'none') +
labs(title = '1-mode network people')
p0 / (p1 + p2)
el_2m <- g %E>%
mutate(from_name = .N()$name[from],
to_name = .N()$name[to]) %>%
as.tibble() %>%
select(to_name, from_name) %>%
rename(from = to_name, to = from_name)
el_2m
g_2m <- el_2m %>% as_tbl_graph(directed = TRUE) %N>%
mutate(type = name %in% (el_2m %>% pull(from)))
mat_2m <- el_2m %>%
table() %>%
as.matrix()
mat_2m
to
from Crossfit Jazz Club Jomfru Anne Gade Yoga House
Casper 0 0 1 1
Dorte 1 0 1 0
Helle 0 1 0 1
Jacob 1 0 1 1
Lise 1 1 0 0
Mette 1 0 1 0
Morten 0 0 0 1
Pernille 0 0 1 0
library(Matrix)
mat_2m %<>% Matrix(sparse = TRUE)
mat_2m
8 x 4 sparse Matrix of class "dgCMatrix"
to
from Crossfit Jazz Club Jomfru Anne Gade Yoga House
Casper . . 1 1
Dorte 1 . 1 .
Helle . 1 . 1
Jacob 1 . 1 1
Lise 1 1 . .
Mette 1 . 1 .
Morten . . . 1
Pernille . . 1 .
m %*% t(m))mat_people <- mat_2m %*% t(mat_2m)
mat_people
8 x 8 sparse Matrix of class "dgCMatrix"
Casper Dorte Helle Jacob Lise Mette Morten Pernille
Casper 2 1 1 2 . 1 1 1
Dorte 1 2 . 2 1 2 . 1
Helle 1 . 2 1 1 . 1 .
Jacob 2 2 1 3 1 2 1 1
Lise . 1 1 1 2 1 . .
Mette 1 2 . 2 1 2 . 1
Morten 1 . 1 1 . . 1 .
Pernille 1 1 . 1 . 1 . 1
t(m) %*% m)mat_places <- t(mat_2m) %*% mat_2m
mat_places
4 x 4 sparse Matrix of class "dgCMatrix"
Crossfit Jazz Club Jomfru Anne Gade Yoga House
Crossfit 4 1 3 1
Jazz Club 1 2 . 1
Jomfru Anne Gade 3 . 5 2
Yoga House 1 1 2 4
## Helper function
el_to_sparse_matrix <- function(data, # the edgelist
mode_1, # which variable indicates mode 1
mode_2, # which variable indicates mode 2
projection = 'none' # If a pojection should be done, possible is 'none', 'mode1', 'mode2'
){
# Define inputs
i_input <- data %>% pull({{mode_1}})
j_input <- data %>% pull({{mode_2}})
require(Matrix)
mat <- spMatrix(nrow = i_input %>% n_distinct(),
ncol = j_input %>% n_distinct(),
i = i_input %>% factor() %>% as.numeric(),
j = j_input %>% factor() %>% as.numeric(),
x = rep(1, i_input %>% length() ) )
row.names(mat) <- i_input %>% factor() %>% levels()
colnames(mat) <- j_input %>% factor() %>% levels()
# Projection if necessary
if(projection == 'mode1'){mat %<>% tcrossprod()}
if(projection == 'mode2'){mat %<>% crossprod()}
return(mat)
}
mat_people <- el_2m %>% el_to_sparse_matrix(from, to, projection = 'mode1')
mat_places <- el_2m %>% el_to_sparse_matrix(from, to, projection = 'mode2')
mat_people
8 x 8 sparse Matrix of class "dsCMatrix"
Casper Dorte Helle Jacob Lise Mette Morten Pernille
Casper 2 1 1 2 . 1 1 1
Dorte 1 2 . 2 1 2 . 1
Helle 1 . 2 1 1 . 1 .
Jacob 2 2 1 3 1 2 1 1
Lise . 1 1 1 2 1 . .
Mette 1 2 . 2 1 2 . 1
Morten 1 . 1 1 . . 1 .
Pernille 1 1 . 1 . 1 . 1
mat_places
4 x 4 sparse Matrix of class "dsCMatrix"
Crossfit Jazz Club Jomfru Anne Gade Yoga House
Crossfit 4 1 3 1
Jazz Club 1 2 . 1
Jomfru Anne Gade 3 . 5 2
Yoga House 1 1 2 4
el_people <- el_2m %>% left_join(el_2m, by = 'to') %>%
select(-to) %>%
rename(from = from.x, to = from.y) %>%
filter(from != to) %>%
count(from, to, name = 'weight')
el_people
el_places <- el_2m %>% left_join(el_2m, by = 'from') %>%
select(-from) %>%
rename(from = to.x, to = to.y) %>%
filter(from != to) %>%
count(from, to, name = 'weight')
el_places
Lets talk about bibliographic networks. In short, that are networks between documents which cite each others. That can be (commonly) academic publications, but also patents or policy reports. Conceptually, we can see them as 2 mode networks, between articles and their reference. That helps us to apply some interesting metrics, such as:
Interestingly, different projections of this 2-mode network give the whole resulting 1-mode network a different meaning.
We will here do a brief bibliometric network analysis.
While there exist specialized packages to do it more conveniently (eg. bibliometrix), we will for mximum clarity construct everything somewhat by hand.
I will illustrate more in detail in the following. The example is based on some own work, where i here in very simple way recreate some parts of the analysis.
Rakas, M., & Hain, D. S. (2019). The state of innovation system research: What happens beneath the surface?. Research Policy, 45 (9). DOI: https://doi.org/10.1016/j.respol.2019.04.011
network analysis in their title, abstract, or keywords.TITLE-ABS-KEY ( "network analysis" ) AND ( LIMIT-TO ( DOCTYPE , "ar" ) OR LIMIT-TO ( DOCTYPE , "cp" ) ) AND ( LIMIT-TO ( LANGUAGE , "English" ) ) AND ( LIMIT-TO ( SRCTYPE , "j" ) OR LIMIT-TO ( SRCTYPE , "p" ) ) AND ( LIMIT-TO ( PUBYEAR , 2021 ) OR LIMIT-TO ( PUBYEAR , 2020 ) OR LIMIT-TO ( PUBYEAR , 2019 ) OR LIMIT-TO ( PUBYEAR , 2018 ) OR LIMIT-TO ( PUBYEAR , 2017 ) OR LIMIT-TO ( PUBYEAR , 2016 ) OR LIMIT-TO ( PUBYEAR , 2015 ) )csv. We select al possible fields to download.rm(list=ls())
data <- read_csv('https://github.com/SDS-AAU/SDS-master/raw/master/00_data/networks_bibliometrics/biblio_nw.csv')
data %>%
glimpse()
Rows: 2,000
Columns: 43
$ Authors <chr> "Wang D., Cui P., Zhu W.", "Thorsson V., Gibbs D.L., Brown S.D., Wolf D., Bortone D.S., Ou Yang T.-H., Porta-Pardo E., Gao G.F., Plaisier C.L…
$ `Author(s) ID` <chr> "56780729900;34568700100;7404232311;", "6602742809;57189045500;57199191934;7402650510;11640638800;57194758425;56422456700;57200880842;1360507…
$ Title <chr> "Structural deep network embedding", "The Immune Landscape of Cancer", "A Human Interactome in Three Quantitative Dimensions Organized by Sto…
$ Year <dbl> 2016, 2018, 2015, 2015, 2015, 2015, 2016, 2015, 2015, 2015, 2015, 2017, 2016, 2015, 2015, 2017, 2017, 2016, 2016, 2016, 2015, 2015, 2016, 201…
$ `Source title` <chr> "Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining", "Immunity", "Cell", "Proteomics", "IJCAI Int…
$ Volume <chr> "13-17-August-2016", "48", "163", "15", "2015-January", "162", "428", "47", "2015", "9", "21", "356", "530", "9", "112", "545", "2", "354", "…
$ Issue <chr> NA, "4", "3", "15", NA, "2", "4", "2", NA, "11", "1", "6343", "7590", "JUNE", "51", "7652", NA, "6314", "6295", "4", "3", "6", "6", NA, "7540…
$ `Art. No.` <chr> NA, NA, NA, NA, NA, "8281", NA, NA, "bav028", NA, NA, NA, NA, "386", NA, NA, "16270", NA, NA, NA, NA, NA, "7152988", NA, NA, NA, NA, NA, "753…
$ `Page start` <chr> "1225", "812", "712", "2597", "2111", "375", "688", "106", NA, "2490", "37", "1140", "307", NA, "15672", "48", NA, "847", "163", "694", "176"…
$ `Page end` <chr> "1234", "830.e14", "723", "2601", "2117", "390", "692", "114", NA, "2502", "46", "1144", "312", NA, "15677", "53", NA, "850", "166", "707", "…
$ `Page count` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 16, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ `Cited by` <dbl> 781, 607, 507, 468, 418, 404, 378, 377, 363, 356, 354, 347, 347, 328, 322, 313, 306, 297, 293, 291, 278, 277, 274, 271, 271, 254, 252, 252, 2…
$ DOI <chr> "10.1145/2939672.2939753", "10.1016/j.immuni.2018.03.023", "10.1016/j.cell.2015.09.053", "10.1002/pmic.201400515", NA, "10.1016/j.cell.2015.0…
$ Link <chr> "https://www.scopus.com/inward/record.uri?eid=2-s2.0-84985034266&doi=10.1145%2f2939672.2939753&partnerID=40&md5=c1d4a7ce2a487319f557a35d2453f…
$ Affiliations <chr> "Tsinghua National Laboratory for Information Science and Technology, Department of Computer Science and Technology, Tsinghua University, Bei…
$ `Authors with affiliations` <chr> "Wang, D., Tsinghua National Laboratory for Information Science and Technology, Department of Computer Science and Technology, Tsinghua Unive…
$ Abstract <chr> "Network embedding is an important method to learn low-dimensional representations of vertexes in networks, aiming to capture and preserve th…
$ `Author Keywords` <chr> "Deep learning; Network analysis; Network embedding", "cancer genomics; immune subtypes; immuno-oncology; immunomodulatory; immunotherapy; in…
$ `Index Keywords` <chr> "Classification (of information); Data mining; Electric network analysis; Functions; Linear network analysis; Deep learning; Global and local…
$ `Funding Details` <chr> "National Key Basic Research Program For Youth: 2015CB352300\n\nNational Natural Science Foundation of China, NSFC: 61531006, 61370022, 61472…
$ `Funding Text 1` <chr> "This work was supported by National Program on Key Basic Research Project, No. 2015CB352300; National Natural Science Foundation of China, N…
$ `Funding Text 2` <chr> NA, "Michael Seiler, Peter G. Smith, Ping Zhu, Silvia Buonamici, and Lihua Yu are employees of H3 Biomedicine, Inc. Parts of this work are th…
$ `Funding Text 3` <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ `Funding Text 4` <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ `Funding Text 5` <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ `Funding Text 6` <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ `Funding Text 7` <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ References <chr> "Belkin, M., Niyogi, P., Laplacian eigenmaps for dimensionality reduction and data representation (2003) Neural Computation, 15 (6), pp. 1373…
$ `Correspondence Address` <chr> NA, NA, "Hyman, A.A.; Max Planck Institute of Molecular Cell Biology and GeneticsGermany; email: hyman@mpi-cbg.de", "Mathivanan, S.; Departme…
$ Editors <chr> NA, NA, NA, NA, "Wooldridge M.Yang Q.", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Sponsors <chr> "ACM SIGKDD;ACM SIGMOD", NA, NA, NA, "Alibaba.com;Department of Computer Science and Engineering at Universidad Nacional del Sur;Department o…
$ Publisher <chr> "Association for Computing Machinery", "Cell Press", "Cell Press", "Wiley-VCH Verlag", "International Joint Conferences on Artificial Intelli…
$ `Conference name` <chr> "22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD 2016", NA, NA, NA, "24th International Joint Conference…
$ `Conference date` <chr> "13 August 2016 through 17 August 2016", NA, NA, NA, "25 July 2015 through 31 July 2015", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ `Conference location` <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ `Conference code` <dbl> 123286, NA, NA, NA, 116754, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 132417, NA, NA, NA, NA, NA, NA, N…
$ ISSN <chr> NA, "10747613", "00928674", "16159853", "10450823", "00928674", "00222836", "10614036", "17580463", "17517362", "10788956", "00368075", "0028…
$ ISBN <chr> "9781450342322", NA, NA, NA, "9781577357384", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "9781538604571"…
$ CODEN <chr> NA, "IUNIE", "CELLB", "PROTC", NA, "CELLB", "JMOBA", "NGENE", NA, NA, "NAMEF", "SCIEA", "NATUA", NA, "PNASA", "NATUA", NA, "SCIEA", "SCIEA", …
$ `Abbreviated Source Title` <chr> "Proc. ACM SIGKDD Int. Conf. Knowl. Discov. Data Min.", "Immunity", "Cell", "Proteomics", "IJCAI Int. Joint Conf. Artif. Intell.", "Cell", "J…
$ `Document Type` <chr> "Conference Paper", "Article", "Article", "Article", "Conference Paper", "Article", "Article", "Article", "Article", "Article", "Article", "A…
$ Source <chr> "Scopus", "Scopus", "Scopus", "Scopus", "Scopus", "Scopus", "Scopus", "Scopus", "Scopus", "Scopus", "Scopus", "Scopus", "Scopus", "Scopus", "…
$ EID <chr> "2-s2.0-84985034266", "2-s2.0-85044934017", "2-s2.0-84948067587", "2-s2.0-84938748671", "2-s2.0-84949803524", "2-s2.0-84937212591", "2-s2.0-8…
data %<>%
select(EID, Authors, `Author(s) ID`, Title, `Abbreviated Source Title`, Year, Affiliations, `Author Keywords`, `Cited by`, Abstract, References) %>%
rename(author = Authors,
author_id = `Author(s) ID`,
title = Title,
journal = `Abbreviated Source Title`,
year = Year,
affiliation = Affiliations,
keywords = `Author Keywords`,
citations = `Cited by`,
Abstract = Abstract,
references = References)
data[2, 'author']
data[2, 'author_id']
data[2, 'affiliation']
data[2, "keywords"]
data[2, "references"]
data %<>%
mutate(author = author %>% str_split(', '),
author_id = author_id %>% str_split(';'),
affiliation = affiliation %>% str_split(';'),
keywords = keywords %>% str_split('; '),
references = references %>% str_split('; '))
data %>% select(title, author, citations) %>%
unnest(author) %>%
head()
data %>% select(title, author, citations) %>%
unnest(author) %>%
group_by(author) %>%
summarise(n = n(),
citations = citations %>% sum(na.rm = TRUE)) %>%
arrange(desc(citations)) %>%
head(10)
I will now transfer them to an article \(\rightarrow\) reference edgelist.
el_2m <- data %>% select(EID, references) %>% unnest(references)
el_2m %>% head()
el_bib <- el_2m %>% left_join(el_2m, by = 'references') %>%
rename(from = EID.x,
to = EID.y) %>%
select(from, to) %>%
filter(from != to) %>%
count(from, to, name = 'weight')
el_bib %>% head()
el_bib %>%
ggplot(aes(x = weight)) +
geom_histogram()
el_bib %<>%
filter(weight >=2)
el_bib %>%
ggplot(aes(x = weight)) +
geom_histogram()
g_bib <- el_bib %>% as_tbl_graph(directed = FALSE) %>%
igraph::simplify() %>%
as_tbl_graph(directed = FALSE)
g_bib
# A tbl_graph: 684 nodes and 1046 edges
#
# An undirected simple graph with 109 components
#
# Node Data: 684 x 1 (active)
name
<chr>
1 2-s2.0-84909633782
2 2-s2.0-84910060076
3 2-s2.0-84912573275
4 2-s2.0-84918503641
5 2-s2.0-84918774371
6 2-s2.0-84919882800
# … with 678 more rows
#
# Edge Data: 1,046 x 3
from to weight
<int> <int> <dbl>
1 1 94 4
2 1 162 6
3 1 255 14
# … with 1,043 more rows
g_bib <- g_bib %N>%
filter(!node_is_isolated()) %N>%
filter(percent_rank(centrality_degree(weights = weight)) >= 0.5)
g_bib <- g_bib %N>%
mutate(com = group_louvain(weights = weight))
morph() functions of tidygraph, which basically let you apply group_by style operations on graph structures, where graph calculations are execited on subgraphs.g_bib <- g_bib %N>%
morph(to_split, com) %>%
mutate(cent_dgr_int = centrality_degree(weights = weight)) %>%
mutate(com_center = cent_dgr_int == max(cent_dgr_int)) %>%
unmorph()
g_bib %N>%
as_tibble() %>%
count(com, sort = TRUE)
g_bib <- g_bib %N>%
add_count(com, name = 'com_n') %>%
mutate(com = ifelse(com_n >= 20, com, NA) ) %>%
select(-com_n)
g_bib <- g_bib %N>%
left_join(data %>% select(EID, title, journal, year, citations), by = c('name' = 'EID')) %>%
mutate(title = title %>% str_trunc(30))
set.seed(1337)
g_bib %>%
ggraph(layout = 'graphopt') +
geom_edge_link(aes(width = weight,
color = .N()$com[from] %>% as.factor()), # Notice that
alpha = 0.5,
show.legend = FALSE) +
scale_edge_width(range = c(0.5, 2)) +
geom_node_point(aes(color = com %>% as.factor(),
size = centrality_degree(weight = weight),
alpha = citations)) +
geom_node_text(aes(label = title, filter = com_center == TRUE), repel = TRUE) +
theme_graph() +
theme(legend.position = 'bottom') +
labs(title = 'Bibliographic Coupling Network',
subtitle = 'Network Analysis 2015-2020',
color = 'Community',
size = 'Degree',
alpha = 'Citations')
data %>%
select(EID, author, year, title, journal, citations) %>%
inner_join(g_bib %N>% as_tibble() %>% select(name, com, cent_dgr_int), by = c('EID' = 'name')) %>%
group_by(com) %>%
arrange(desc(cent_dgr_int)) %>%
slice(1:10) %>%
ungroup() %>%
select(com, title, cent_dgr_int, citations) %>%
mutate(title = title %>% str_trunc(75))
el_cit <- el_2m %>% left_join(el_2m, by = 'EID') %>%
rename(from = references.x,
to = references.y) %>%
select(from, to) %>%
filter(from != to) %>%
count(from, to, name = 'weight')
el_cit %<>%
filter(weight >=3,
!str_detect(from, '\\:\\, '),
!str_detect(to, '\\:\\, '),
str_length(from) > 50,
str_length(to) > 50
)
el_cit %>%
ggplot(aes(x = weight)) +
geom_histogram()
g_cit <- el_cit %>% as_tbl_graph(directed = FALSE) %>%
igraph::simplify() %>%
as_tbl_graph(directed = FALSE) %N>%
filter(!node_is_isolated()) %N>%
filter(percent_rank(centrality_degree(weights = weight)) >= 0.5) %N>%
mutate(com = group_louvain(weights = weight)) %N>%
morph(to_split, com) %>%
mutate(cent_dgr_int = centrality_degree(weights = weight)) %>%
mutate(com_center = cent_dgr_int == max(cent_dgr_int)) %>%
unmorph()
g_cit %N>%
as_tibble() %>%
count(com, sort = TRUE)
g_cit <- g_cit %N>%
add_count(com, name = 'com_n') %>%
mutate(com = ifelse(com_n >= 10, com, NA) ) %>%
select(-com_n)
Lets take a look at the network.
set.seed(1337)
g_cit %N>%
mutate(name = name %>% str_trunc(75)) %>%
ggraph(layout = 'graphopt') +
geom_edge_link(aes(width = weight,
color = .N()$com[from] %>% as.factor()), # Notice that
alpha = 0.5,
show.legend = FALSE) +
scale_edge_width(range = c(0.5, 2)) +
geom_node_point(aes(color = com %>% as.factor(),
size = centrality_degree(weight = weight),
alpha = cent_dgr_int)) +
geom_node_text(aes(label = name, filter = com_center == TRUE & percent_rank(cent_dgr_int) > 0.80 ), repel = TRUE) +
theme_graph() +
theme(legend.position = 'bottom') +
labs(title = 'Bibliographic Coupling Network',
subtitle = 'Network Analysis 2015-2020',
color = 'Community',
size = 'Degree',
alpha = 'Citations')
g_cit %N>%
as_tibble() %>%
group_by(com) %>%
arrange(desc(cent_dgr_int)) %>%
slice(1:10) %>%
ungroup() %>%
mutate(name = name %>% str_trunc(75)) %>%
select(com, name, cent_dgr_int)
el_joint <- el_2m %>%
inner_join(g_bib %N>% as_tibble() %>% select(name, com) %>% drop_na(), by = c('EID' = 'name')) %>%
inner_join(g_cit %N>% as_tibble() %>% select(name, com) %>% drop_na(), by = c('references' = 'name')) %>%
rename(from = com.x, to = com.y) %>%
count(from, to, name = 'weight')
el_joint
el_joint %<>%
mutate(from = paste('bib', from, sep = '_'),
to = paste('cit', to, sep = '_'))
g_joint <- el_joint %>% as_tbl_graph(directed = TRUE)
g_joint <- g_joint %N>%
mutate(type = name %>% str_detect('bib'))
g_joint %>% ggraph("bipartite") +
geom_edge_link(alpha = 0.25) +
geom_node_point(aes(col = type, size = centrality_degree(mode = 'all'))) +
geom_node_text(aes(label = name), repel = TRUE) +
theme_graph() +
theme(legend.position = 'none')
Please do Exercise 1 in the corresponding section on Github. This time you are about to do your own bibliographic analysis!
Paper mentioned in the text
Other own work dealing with 2-mode networks
sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Catalina 10.15.7
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] Matrix_1.2-18 patchwork_1.0.1 igraph_1.2.6 ggraph_2.0.3 tidygraph_1.2.0 magrittr_1.5 forcats_0.5.0 stringr_1.4.0 dplyr_1.0.2 purrr_0.3.4 readr_1.4.0
[12] tidyr_1.1.2 tibble_3.0.4 ggplot2_3.3.2 tidyverse_1.3.0 knitr_1.30
loaded via a namespace (and not attached):
[1] ggrepel_0.8.2 Rcpp_1.0.5 lubridate_1.7.9 lattice_0.20-41 assertthat_0.2.1 digest_0.6.25 utf8_1.1.4 ggforce_0.3.2 R6_2.4.1
[10] cellranger_1.1.0 backports_1.1.10 reprex_0.3.0 httr_1.4.2 pillar_1.4.6 rlang_0.4.8 curl_4.3 readxl_1.3.1 rstudioapi_0.11
[19] blob_1.2.1 labeling_0.3 polyclip_1.10-0 munsell_0.5.0 broom_0.7.1 compiler_4.0.2 modelr_0.1.8 xfun_0.18 pkgconfig_2.0.3
[28] tidyselect_1.1.0 gridExtra_2.3 graphlayouts_0.7.0 fansi_0.4.1 viridisLite_0.3.0 crayon_1.3.4 dbplyr_1.4.4 withr_2.3.0 MASS_7.3-53
[37] grid_4.0.2 jsonlite_1.7.1 gtable_0.3.0 lifecycle_0.2.0 DBI_1.1.0 scales_1.1.1 cli_2.1.0 stringi_1.5.3 farver_2.0.3
[46] viridis_0.5.1 fs_1.5.0 xml2_1.3.2 ellipsis_0.3.1 generics_0.0.2 vctrs_0.3.4 tools_4.0.2 glue_1.4.2 tweenr_1.0.1
[55] hms_0.5.3 yaml_2.2.1 colorspace_1.4-1 rvest_0.3.6 haven_2.3.1